// balxml_validatingreader.h                                          -*-C++-*-
#ifndef INCLUDED_BALXML_VALIDATINGREADER
#define INCLUDED_BALXML_VALIDATINGREADER

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide a common reader protocol for parsing and validating XML.
//
//@CLASSES:
//  balxml::ValidatingReader: reader protocol for parsing and validating XML
//
//@SEE_ALSO: balxml_reader
//
//@DESCRIPTION: This component represents an abstract class
// `balxml::ValidatingReader` - an XML reader that provides data validation
// against DTD or/and XML Schemas(XSD).  The `balxml::ValidatingReader`
// inherits from the `balxml::Reader` interface and therefore fully compliant
// with it.  In addition, `balxml::ValidatingReader` provides additional
// methods to control the validation.  The `enableValidation` method specifies
// what type of validation the reader should perform.  Setting `validationFlag`
// to `false` produces a non-validating reader.  Setting it to `true` forces
// the reader perform the validation of input XML data against XSD schemas.
//
///Schema Location and obtaining Schemas
///-------------------------------------
// In validating mode the reader should be able obtain external XSD schemas.
// `balxml::ValidatingReader` requires that all schema sources must be
// represented in the form of `bsl::streambuf` objects.  According to W3C
// standard an information about external XSD schemas can be defined in three
// places:
//
// * In an instance document, the attribute `xsi:schemaLocation` provides
//   hints from the author to a processor regarding the location of schema
//   documents.  The `schemaLocation` attribute value consists of one or more
//   pairs of URI references, separated by white space.  The first member of
//   each pair is a namespace name, and the second member of the pair is a
//   hint describing where to find an appropriate schema document for that
//   namespace.  The presence of these hints does not require the processor to
//   obtain or use the cited schema documents, and the processor is free to
//   use other schemas obtained by any suitable means.  For example, XercesC
//   has a property XercesSchemaExternalSchemaLocation, that informs parser
//   about available schemas exactly in the same format as the attribute
//   `schemaLocation` in the document instance.
//
// Example:
// ```
//     <purchaseReport
//         xmlns="http://www.example.com/Report"
//         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
//         xsi:schemaLocation="http://www.example.com/Report
//                             http://www.example.com/Report.xsd"
//          period="P3M" periodEnding="1999-12-31">
// ```
//
// * In a schema, the `include` element has a required `schemaLocation`
//   attribute, and it contains a URI reference which must identify a schema
//   document.
// * Also in a schema, the import element has optional namespace and
//   `schemaLocation` attributes.  If present, the `schemaLocation` attribute
//   is understood in a way which parallels the interpretation of
//   `xsi:schemaLocation` in (1).  Specifically, it provides a hint from the
//   author to a processor regarding the location of a schema document that
//   the author warrants supplies the required components for the namespace
//   identified by the namespace attribute.
//
// For all mentioned cases, having the URI reference which identifies a schema
// and an optional namespace, the processor(parser) should obtain
// `bsl::streambuf` object for the schema.  For this purpose
// `balxml::ValidatingReader` interface defines the two level schemas
// resolution process:
//
// 1. The reader(parser) must lookup schema in internal cache.  If the schema
//    is found, it must be used.
// 2. Otherwise reader must use the associated resolver to obtain schema (see
//    `balxml::Reader::XmlResolverFunctor`).
//
// Both the schema cache and resolver should be setup before the method `open`
// is called.
//
///Schema Cache
///------------
// `balxml::ValidatingReader` provides two abstract methods to maintain the
// schema cache:
//
// * `addSchema`, add a schema to the cache
// * `removeSchemas`, clear the cache and remove all schemas
//
///Thread Safety
///-------------
// This component does not provide any functions that present a thread safety
// issue, since the `balxml::Reader` class is abstract and cannot be
// instantiated.  There is no guarantee that any specific derived class will
// provide a thread-safe implementation.
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Basic Usage
/// - - - - - - - - - - -
// In this example, we will create a validating parser that parses and
// validates document again the schema.
// ```
// #include <a_xercesc_reader.h>
//
// #include <balxml_validatingreader.h>
// #include <balxml_errorinfo.h>
// #include <iostream>
// #include <sstream>
// ```
// The following string describes an XSD schema for the documents we are going
// to parse:
// ```
// const char TEST_XSD_STRING[] =
//    "<?xml version='1.0' encoding='UTF-8'?>"
//    "<xsd:schema xmlns:xsd='http://www.w3.org/2001/XMLSchema'"
//    "            xmlns='http://bloomberg.com/schemas/directory'"
//    "            targetNamespace='http://bloomberg.com/schemas/directory'"
//    "            elementFormDefault='qualified'"
//    "            attributeFormDefault='qualified' >"
//    " "
//    "<xsd:complexType name='entryType'>"
//    "    <xsd:sequence>"
//    "    <xsd:element name='name' type='xsd:string'/>"
//    "    <xsd:element name='phone'>"
//    "        <xsd:complexType>"
//    "        <xsd:simpleContent>"
//    "            <xsd:extension base='xsd:string'>"
//    "                <xsd:attribute name='phonetype' type='xsd:string'/>"
//    "            </xsd:extension>"
//    "        </xsd:simpleContent>"
//    "        </xsd:complexType>"
//    "    </xsd:element>"
//    "    <xsd:element name='address' type='xsd:string'/>"
//    "    </xsd:sequence>"
//    "</xsd:complexType>"
//    " "
//    "<xsd:element name='directory-entry' type='entryType'/>"
//    "</xsd:schema>";
// ```
// The following string describes correct XML for a conforming schema.  The
// top-level element contains one XML namespace attribute, with one embedded
// entry describing a user:
// ```
// const char TEST_GOOD_XML_STRING[] =
//   "<?xml version='1.0' encoding='UTF-8'?>\n"
//   "<directory-entry xmlns:dir='http://bloomberg.com/schemas/directory'\n"
//   "     xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n"
//   "     xsi:schemaLocation='http://bloomberg.com/schemas/directory  \n"
//   "                         aaa.xsd' >\n"
//   "    <name>John Smith</name>\n"
//   "    <phone dir:phonetype='cell'>212-318-2000</phone>\n"
//   "    <address/>\n"
//   "</directory-entry>\n";
// ```
// The following string describes invalid XML.  More specifically, the XML
// document is well-formed, but does not conform to our schema:
// ```
// const char TEST_BAD_XML_STRING[] =
//   "<?xml version='1.0' encoding='UTF-8'?>\n"
//   "<directory-entry xmlns:dir='http://bloomberg.com/schemas/directory'\n"
//   "     xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n"
//   "     xsi:schemaLocation='http://bloomberg.com/schemas/directory  \n"
//   "                         aaa.xsd' >\n"
//   "    <name>John Smith</name>\n"
//   "    <phone dir:phonetype='cell'>212-318-2000</phone>\n"
//   "</directory-entry>\n";
// ```
// Now we define a `parse` method for parsing an XML document and validating
// against an XSD schema:
// ```
// int parse(balxml::ValidatingReader *reader,
//           const char              *xmlData,
//           const char              *xsdSchema)
// {
// ```
// In order to read the XML, we first need to construct a
// `balxml::NamespaceRegistry` object, a `balxml::PrefixStack` object, and a
// `TestReader` object, where `TestReader` is a derived implementation of
// `balxml_validatingreader`.
// ```
//   balxml::NamespaceRegistry namespaces;
//   balxml::PrefixStack prefixStack(&namespaces);
//
//   ASSERT(!reader->isOpen());
// ```
// The reader uses a `balxml::PrefixStack` to manage namespace prefixes so we
// need to set it before we call open.
// ```
//   reader->setPrefixStack(&prefixStack);
//   ASSERT(reader->prefixStack() == &prefixStack);
// ```
// Setup validation
// ```
//   reader->removeSchemas();
//
//   reader->enableValidation(true);
//   ASSERT(reader->validationFlag());
//
//   bsl::istringstream schemaStream(xsdSchema);
//   reader->addSchema("aaa.xsd", schemaStream.rdbuf());
// ```
// Now we call the `open` method to setup the reader for parsing using the data
// contained in the in the XML string.
// ```
//   int rc = reader->open(xmlData, bsl::strlen(xmlData), 0, "UTF-8");
//   ASSERT(rc == 0);
// ```
// Confirm that the `bdem::Reader` has opened properly
// ```
//   ASSERT(reader->isOpen());
// ```
// Do actual document reading
// ```
//   while(1) {
//       rc = reader->advanceToNextNode ();
//       if (rc != 0) {
//           break;
//      }
// ```
//      process current node here
// ```
//   }
// ```
// Cleanup and close the reader.
// ```
//   reader->close();
//   ASSERT(!reader->isOpen());
//
//   reader->setPrefixStack(0);
//   ASSERT(reader->prefixStack() == 0);
//
//   return rc;
// }
// ```
// The main program parses an XML string using the TestReader
// ```
// int usageExample()
// {
//     a_xercesc::Reader  reader;
//
//     int rc = parse(&reader, TEST_GOOD_XML_STRING, TEST_XSD_STRING);
// ```
//  Normal end of data
// ```
//   ASSERT(rc==1);
//
//   int rc = parse(&reader, TEST_BAD_XML_STRING, TEST_XSD_STRING);
// ```
//  Parser error - document validation failed
// ```
//   ASSERT(rc==-1);
//
//   return 0;
// }
// ```

#include <balscm_version.h>

#include <balxml_reader.h>

#include <bsls_keyword.h>

namespace BloombergLP {

namespace balxml {
                           // ======================
                           // class ValidatingReader
                           // ======================

/// TBD Class description
class ValidatingReader : public Reader {

  public:
    // CREATORS

    /// Destroy this object.
    ~ValidatingReader() BSLS_KEYWORD_OVERRIDE;

    // MANIPULATORS

    /// Enable or disable XML validation, if the specified `validationFlag`
    /// is true and disable it otherwise.  A validating reader is often
    /// faster if validation is disabled.  This operation does not take
    /// effect until the next call to the `open` method.
    virtual void enableValidation(bool validationFlag) = 0;

    /// Associate the specified `location` with the XSD document in the
    /// specified `schema` stream.  Return 0 on success and non-zero on
    /// error.  This method may be called more than once to add multiple
    /// location-schema associations to a pre-resolved schema cache.  During
    /// XML validation, a reference to a schema with a specific location
    /// will be looked up in the schema cache.  Only if this resolution
    /// fails, is the schema resolver used to find the external schema.
    /// (See `setResolver` in the `Reader` base class.)  The effects of
    /// calling this method when the reader is already open is not specified
    /// and a derived class implementation may treat it as an error.  The
    /// behavior is undefined unless `schema` is a repositionable stream
    /// (i.e., it must be possible to read from the stream, then seek to the
    /// beginning of the stream and read the same bytes again.)
    virtual int addSchema(const char *location, bsl::streambuf *schema) = 0;

    /// Remove all location-to-schema associations that were added using
    /// `addSchema`.  This method should be called each time after parsing
    /// is finished and the reader would be re-used for parsing other
    /// documents with different schemas.  Note that calling `close` on the
    /// reader *does* *not* remove the schemas.
    virtual void removeSchemas() = 0;

    // ACCESSORS

    /// Return true if the reader has validation turned on false otherwise.
    virtual bool validationFlag() const = 0;
};

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------
